###Replication code - figures
#Electoral rewards and punishments for trade compensation####
#World Politics
#Minju KIM and Robert GULOTTY
#August 26, 2023

###Preparation
library(stringr)
library(dplyr)
library(tidyr)
library(sandwich)
library(survival)
library(AER)
library(readr)
library(lubridate)
library(stargazer)
library(ggplot2)
library(ggpubr)
library(ggrepel)
library(maps)
library(marginaleffects)
library(readstata13)
select <- dplyr::select
mutate  <- dplyr::mutate
summarise <- dplyr::summarise
filter <- dplyr::filter

setwd("~/Dropbox/TAAChinaShock/World Politics/replication/")

###########################Figures in the manuscript
#######Figure 1: Binary China Shock Map
master <- read.csv("master.csv", as.is = T)

master_fstat <-
  master %>% select(
    d_rep_0800,
    d_tradeusch_pw,
    d_tradeotch_pw_lag,
    ipw_us2,
    ipw_others2,
    l_shind_manuf_cbp,
    l_sh_routine33,
    l_task_outsource,
    l_sh_empl_f,
    l_sh_popedu_c,
    l_sh_popfborn,
    region,
    Midwest,
    Northeast,
    South,
    West,
    pcert,
    t2,
    votetotal_2000,
    actualcert,
    totalpetitions
  ) %>% na.omit()

countymap <- map_data("county")

shock <- master %>%
  select(fips5, ipw_us2, ipw_others2, t2) %>%
  mutate(shockyes = ifelse(ipw_us2 == 1 & ipw_others2 == 1, 1, 0)) %>%
  filter(t2 == 1)

###Download the county dataset
library(remotes)
install_version("noncensus", "0.1")
library(noncensus)
data(counties)

counties2 <- counties %>%
  select(county_name, state_fips, county_fips) %>%
  mutate(fips5 = as.numeric(paste(state_fips, county_fips, sep = ""))) %>%
  select(-state_fips,-county_fips) %>%
  mutate(county = tolower(county_name)) %>% select(-county_name) %>%
  mutate(subregion = gsub("\\s*county", "", county, ignore.case = TRUE)) %>%
  select(-county)

shock2 <- shock %>% left_join(counties2)

shockcounty <- inner_join(countymap, shock2)

###Basic map
basic <- ggplot(data = shockcounty,
                mapping = aes(x = long, y = lat, group = group)) +
  coord_fixed(1.3) +
  geom_polygon(color = "black",
               fill = "white",
               size = 0.15)

ditch_the_axes <- theme(
  axis.text = element_blank(),
  axis.line = element_blank(),
  axis.ticks = element_blank(),
  panel.border = element_blank(),
  panel.grid = element_blank(),
  axis.title = element_blank()
)

###Color the high- low-shocked areas to the basic map
shockmap <- basic +
  geom_polygon(data = shockcounty, aes(fill = factor(shockyes)), show.legend = FALSE) +
  geom_polygon(color = "black",
               fill = "NA",
               size = 0.1) +
  ditch_the_axes +
  scale_fill_manual(values = c("white", "black")) +
  labs(title = "US Counties with High China Shock (In Black)",
       subtitle = "Subject to both High US and Non-US Import Penetration")

ggsave(shockmap, 
       file="~/Dropbox/TAAChinaShock/World Politics/replication/figures_replicated/figure1.tiff",
       width=5, height=4)

######Figure 2: Heterogeneous Effects of TAA in High and Low-Shocked Counties
master_fstat$ipw_us2 <- as.factor(master_fstat$ipw_us2)
levels(master_fstat$ipw_us2) <- c("Low", "High")
master_fstat$ipw_others2 <- as.factor(master_fstat$ipw_others2)
levels(master_fstat$ipw_others2) <- c("Low", "High")

###Regression model with the instrument (with predicted TAA certification)
model_int_b <- ivreg(
  d_rep_0800 ~ ipw_us2 * pcert + t2 +
    l_shind_manuf_cbp + l_sh_routine33 +  l_task_outsource +
    l_sh_empl_f + l_sh_popedu_c + l_sh_popfborn +
    region |
    ipw_others2 * pcert + t2 +
    l_shind_manuf_cbp + l_sh_routine33 +  l_task_outsource +
    l_sh_empl_f + l_sh_popedu_c + l_sh_popfborn +
    region,
  weights = votetotal_2000,
  data = master_fstat
)

###Calculate the fitted value of the model
ivdf <- master_fstat %>% mutate(ivfitted = model_int_b$fitted)

###Draw the plot based on the fitted value
itplot <- ggplot(data = ivdf, aes(x = pcert, y = ivfitted)) +
  geom_jitter(alpha = .1, width = .1) +
  stat_smooth(
    method = "lm",
    level = 0.99,
    fullrange = T,
    size = 0.3
  ) +
  ylim(-10, 5) +
  facet_grid(. ~ ipw_us2) + theme_classic() + theme(axis.text.x=element_text(colour="black"),axis.text.y=element_text(colour="black"))+
  labs(title = "With instrumented compensation") +
  xlab("Instrumented TAA Certification Rate") +
  ylab("Change in Republican Vote Share")


###Compare the result with no instrument (with actual TAA certification)
model_int_b_actcert <- ivreg(
  d_rep_0800 ~ ipw_us2 * actualcert + t2 +
    l_shind_manuf_cbp + l_sh_routine33 +  l_task_outsource +
    l_sh_empl_f + l_sh_popedu_c + l_sh_popfborn +
    region |
    ipw_others2 * actualcert + t2 +
    l_shind_manuf_cbp + l_sh_routine33 +  l_task_outsource +
    l_sh_empl_f + l_sh_popedu_c + l_sh_popfborn +
    region,
  weights = votetotal_2000,
  data = master_fstat
)


###Calculate the fitted value of the model
ivdf2 <- master_fstat %>%
  mutate(ivfitted = model_int_b_actcert$fitted)

###Draw the plot based on the fitted value
itplot_act <- ggplot(data = ivdf2, aes(x = actualcert, y = ivfitted)) +
  geom_jitter(alpha = .1, width = .1) +
  stat_smooth(
    method = "lm",
    level = 0.99,
    fullrange = T,
    size = 0.3
  ) +
  ylim(-10, 5) +
  facet_grid(. ~ ipw_us2) + theme_classic() + theme(axis.text.x=element_text(colour="black"), axis.text.y=element_text(colour="black"))+
  labs(title = "Without instrumented compensation") +
  xlab("Observed TAA Certification Rate") +
  ylab("Change in Republican Vote Share")


##Combine the two in one figure
combined <- ggarrange(itplot,
          itplot_act,
          labels = c("A", "B"),
          nrow = 2)

ggsave(combined, 
       file="~/Dropbox/TAAChinaShock/World Politics/replication/figures_replicated/figure2.tiff",
       width=5, height=8)

###########################Figures in the appendix
#######Figure S2: The DOL’s Certification of Country of Origin
###Download the TAA dataset
taa <-
  read.csv("taapetition_c_2020.csv", na.strings = c("", NA))

###Let's rank by years
byyear <- taa %>% group_by(i_year) %>%
  summarise(
    total = n(),
    na_cty = sum(is.na(Country1)),
    na_cty_pct = na_cty / total * 100
  )

###Filter non-NA values
taa2 <- taa %>% filter(!is.na(Country1))

###Order by countries
bycty <- taa2 %>% group_by(Country1) %>%
  summarise(
    total = n(),
    certified = sum(certified, na.rm = T),
    certified_pct = certified / total
  ) %>%
  arrange(-total)

top10 <- c(bycty[c(1:10), ]$Country1)

top10taa <- taa2 %>% filter(Country1 %in% top10)

top10_cty_y <- top10taa %>% group_by(Country1, i_year) %>%
  summarise(total = n())

###Descriptive plots of by countries
cty <-
  ggplot(data = bycty[c(1:10), ], aes(
    x = reorder(Country1, total),
    y = total,
    label = total
  )) +
  geom_bar(stat = "identity", fill = "skyblue") +
  geom_text(size = 3, hjust = -0.01) +
  coord_flip() +
  theme_bw() +
  labs(y = "Total Number of the DOL Certification", x = "Country",
       title = "Top 10 Countries Certified by the DOL in TAA \n 14,360 cases, 2003-2020")

ggsave(cty, 
       file="~/Dropbox/TAAChinaShock/World Politics/replication/figures_replicated/figureS2.tiff",
       width=5, height=6)


#######Figure S13: Import Exposure and the Number of TAA Petitions Certified
###Bring the worker information from the TAA data
taazipcz_vote <-read.csv("taazipcz_vote.csv", 
                         as.is = T, na.strings = c("", "NA"))

taazipcz_cut <- taazipcz_vote %>%
  select(D_year,
         Est..No..Workers,
         certified,
         Det.Date,
         Cert.Officer,
         fips) %>%
  filter(Det.Date > as.Date("2004-11-02") &
           Det.Date < as.Date("2008-11-04"))

taazipcz_cut$Est..No..Workers <-
  as.numeric(taazipcz_cut$Est..No..Workers)

##Calculated the total certified workers
taazipcz_cut <- taazipcz_cut %>%
  mutate(certified_wrks = ifelse(certified == 1, Est..No..Workers, 0))

taa_cut_cty <- taazipcz_cut %>% group_by(fips) %>%
  summarise(
    totalwrks = sum(Est..No..Workers, na.rm = T),
    totalpetitions = n(),
    totalcertified = sum(certified, na.rm = T),
    certified_wrks = sum(certified_wrks, na.rm = T)
  )

##Calculate the changes in import penetration
master_t1 <- master %>% filter(t2 == 0) %>%
  select(fips5, d_tradeusch_pw) %>%
  rename(d_tradeusch_pw_t1 = d_tradeusch_pw)

master_t2 <- master %>% filter(t2 == 1) %>%
  select(fips5, d_tradeusch_pw) %>%
  rename(d_tradeusch_pw_t2 = d_tradeusch_pw)

master_t2_2 <- master_t2 %>% left_join(master_t1) %>%
  mutate(d_tradeusch_pw_t2t1 = d_tradeusch_pw_t2 - d_tradeusch_pw_t1)

###Merge the master data and TAA data at the cz level
master_t2_2 <- master_t2_2%>%left_join(taa_cut_cty, by=c("fips5" = "fips"))
         
cshock_certpetitions <-
  ggplot(aes(x = d_tradeusch_pw_t2t1, y = totalcertified), data = master_t2_2) +
  geom_point(size = 0.5,
             alpha = 0.3,
             color = "blue") +
  theme_bw() +
  labs(x = "Changes in import exposure, county-level",
       y = "Total petitions certified (2005-2008)",
       title = "") +
  geom_rug(sides = "b", alpha = .05)

ggsave(cshock_certpetitions,
       file="~/Dropbox/TAAChinaShock/World Politics/replication/figures_replicated/figureS13.tiff",
       width=5, height=4)

######S14: Import Exposure and the Number of Workers Certified
cshock_certifiedwrks <-
  ggplot(aes(x = d_tradeusch_pw_t2t1, y = certified_wrks), data = master_t2_2) +
  geom_point(size = 1,
             alpha = 0.3,
             color = "blue") +
  theme_bw() +
  labs(x = "Changes in import exposure, county-level",
       y = "Number of workers certified (2005-2008)",
       title = "") +
  geom_rug(sides = "b", alpha = .05)

ggsave(cshock_certifiedwrks,
       file="~/Dropbox/TAAChinaShock/World Politics/replication/figures_replicated/figureS14.tiff",
       width=5, height=4)

######Figure S5: Local Media Coverage of the TAA by Import Exposure
#Bring the news coverage data
news <- read.csv("lexus_nexis_taa_bystate.csv")

#Bring the China Shock data (ADH)
cslong <- read.dta13("workfile_china_long.dta")

#Bring the state data
state <- data.frame(state_abb = state.abb, state_name = state.name)

bystate <- state %>% left_join(news, c("state_abb" = "state")) %>%
  mutate(taa_news_adj = ifelse(!is.na(taa_news), taa_news, 0))

fips <- unique(data.frame(state.fips) %>% select(fips, abb))

cslong <- cslong %>% left_join(fips, by = c("statefip" = "fips"))

csbystate <- cslong %>% group_by(abb) %>%
  summarise(av_d_tradeusch_pw = mean(d_tradeusch_pw))

csbystate <-
  csbystate %>% left_join(bystate, by = c("abb" = "state_abb"))

csbystate <-
  csbystate %>% mutate(coverage_zero = ifelse(taa_news_adj == 0, 1, 0))

#Plot the coverage of news by import exposure
media <-
  ggplot(data = csbystate,
         aes(
           x = av_d_tradeusch_pw,
           y = taa_news_adj,
           label = abb,
           color = as.factor(coverage_zero)
         )) +
  geom_point(size = 0.5) +
  scale_color_manual(values = c("blue", "black")) +
  geom_text_repel(size = 2.5) +
  labs(x = "Import Exposure", y = "Number of Local Media Covering TAA",
       title = "") +
  theme_bw() +
  theme(legend.position = "none")

ggsave(media, 
       file="~/Dropbox/TAAChinaShock/World Politics/replication/figures_replicated/figureS5.tiff",
       width=5, height=4)

######Figure S6: Frequency of TAA-related Press Releases by States
#Press release data on TAA from 2005 to 2007
pr0507 <-
  read.csv("GrimmercondensedandCoded.csv")

#Delete observations with missing values in TAA speeches
pr0507_taa <- pr0507 %>% filter(!is.na(TAAspeeches))

pr0507_taastate <- pr0507_taa %>% group_by(StateCode) %>%
  summarise(taapr = n())

#Bring the statecode crosswalk
cw <-
  read.csv("icpsr_statecode_abb.csv", as.is = T)

pr0507_taastate <- pr0507_taastate %>%
  left_join(cw, by = c("StateCode" = "statecode"))

pr0507_taastate <- pr0507_taastate %>%
  rename(taapr_0507 = taapr) %>%
  select(-StateCode)

#Bring the population dataset
uspop <- read.csv("USpopulation_bystate.csv")

uspop$X1970 <- as.numeric(gsub(",", "", uspop$X1970))
uspop$X1980 <- as.numeric(gsub(",", "", uspop$X1980))
uspop$X1990 <- as.numeric(gsub(",", "", uspop$X1990))
uspop$X2000 <- as.numeric(gsub(",", "", uspop$X2000))
uspop$X2010 <- as.numeric(gsub(",", "", uspop$X2010))

uspop <- uspop %>%
  mutate(avpop = (X1970 + X1980 + X1990 + X2000 + X2010) / 5)

##Calculate TAA applicants from 1975 (beginning of TAA) to 2004 
taa <-
  read.csv("~/Dropbox/TAA_bureaucracy/Dataset/taapetition_c_2020.csv")
taa$State <- toupper(taa$State)
taa2 <- taa %>% filter(i_year < 2005)

taabyst <- taa2 %>% group_by(State) %>%
  summarise(petition = n(),
            workers_petitioned = sum(EstNumWorkers, na.rm = T))

state <- data.frame(state_abb = state.abb, state_name = state.name)

uspop <- uspop %>% select(state, avpop)

state_pop <-
  state %>% left_join(uspop, by = c("state_name" = "state"))

taa_pop <- taabyst %>%
  left_join(state_pop, by = c("State" = "state_abb")) %>%
  mutate(taademand_pct = workers_petitioned / avpop * 100)

#Merge with the press release data from 2005 to 2007
taa_pop_pr <-
  taa_pop %>% left_join(pr0507_taastate, by = c("State" = "abb")) %>%
  mutate(taapr_0507_adj = ifelse(!is.na(taapr_0507), taapr_0507, 0)) %>% select(-taapr_0507) %>%
  filter(complete.cases(.)) %>% arrange(-taademand_pct)

#Draw the figure
taademand_pr0507 <-
  ggplot(aes(x = taademand_pct, y = taapr_0507_adj, label = State),
         data = taa_pop_pr) +
  geom_point(size=0.5) +
  geom_text_repel() +
  geom_smooth(se = TRUE, lty = 3) +
  labs(x = "Demand for TAA (%)", y = "Number of Press Releases on TAA (2005-2007)",
       title = "") +
  theme_bw()

taademand_pr0507

ggsave(taademand_pr0507,
       file="~/Dropbox/TAAChinaShock/World Politics/replication/figures_replicated/figureS6-2.tiff",
       width=5, height=4)

######Figure S7:TAA-related Press Releases and Import Exposure
#Bring the China shock data
workfile <-
  read.dta13(file = "~/Dropbox/TAAChinaShock/dta/chinashock2013_data/Public Release Data/dta/workfile_china.dta")
cw <-
  read.csv("~/Dropbox/TAAChinaShock/csv/icpsr_statecode_abb.csv",
           as.is = T)

workfile2 <-
  workfile %>% select(czone, statefip, yr, d_tradeusch_pw) %>%
  spread(yr, d_tradeusch_pw)
names(workfile2)[3] <- "d_tradeusch_pw_90"
names(workfile2)[4] <- "d_tradeusch_pw_00"
workfile2 <- workfile2 %>%
  mutate(d_tradeusch_pw_0090 = d_tradeusch_pw_00 - d_tradeusch_pw_90)

fips <- unique(data.frame(state.fips) %>% select(fips, abb))
workfile2 <- workfile2 %>% left_join(fips, by = c("statefip" = "fips"))

#Let's weight by the cz population
czpop <- read.csv("~/Dropbox/TAAChinaShock/csv/cz2000_DOA.csv")
czpop <-
  unique(czpop %>% select(czid_1990, cz_ppl_2000_doa)) %>% filter(complete.cases(.))
czpop$cz_ppl_2000_doa <-
  as.numeric(gsub(",", "", czpop$cz_ppl_2000_doa))

#The data is basedo n ppl in cz_2000, so let's combine in case cz in 1990 was split into two
czpop_adj <- czpop %>% group_by(czid_1990) %>%
  summarise(cz_ppl_1990 = sum(cz_ppl_2000_doa))

#There are redundant values (the original dataset was at the country level, not cz level)
workfile2 <-
  workfile2 %>% left_join(czpop_adj, by = c("czone" = "czid_1990"))

#Let's calculate the pct of cz population over state population
wf2byst <- workfile2 %>% group_by(abb) %>%
  summarise(stateppl = sum(cz_ppl_1990, na.rm = T))

workfile2 <- workfile2 %>% left_join(wf2byst, by = c("abb"))
workfile2 <- workfile2 %>% mutate(czppl_ratio = cz_ppl_1990 / stateppl,
                                  cshock_czppl_weighted = d_tradeusch_pw_0090 *
                                    czppl_ratio)

workfile2 <- workfile2 %>% arrange(-d_tradeusch_pw_0090)

csbyst <- workfile2 %>% group_by(abb) %>%
  summarise(
    nczone = n(),
    cshock_state_weighted = sum(cshock_czppl_weighted, na.rm = T),
    cshock_state = mean(d_tradeusch_pw_0090)
  ) %>%
  mutate(
    cshock_state_rank = rank(-cshock_state),
    cshock_state_weighted_rank = rank(-cshock_state_weighted)
  ) %>%
  arrange(cshock_state_weighted_rank)

taa_pop_pr_cs <-
  taa_pop_pr %>% left_join(csbyst, by = c("State" = "abb"))

#Join the original dataset
taa_pop_pr_cs <-
  taa_pop_pr_cs %>% left_join(pr0507_taastate, by = c("State" = "abb"))

taa_pop_pr_cs <- taa_pop_pr_cs %>%
  mutate(taapr_0507_adj = ifelse(!is.na(taapr_0507), taapr_0507, 0))
taa_pop_pr_cs <- taa_pop_pr_cs %>% arrange(-taapr_0507_adj)

#Draw the figure
cshock_pr0507 <-
  ggplot(aes(x = cshock_state_weighted, y = taapr_0507, label = State),
         data = taa_pop_pr_cs) +
  geom_point(size=0.5) +
  geom_text_repel(hjust = 0, nudge_x = 0.05) +
  geom_smooth(se = TRUE, lty = 3) +
  labs(x = "Import Exposure", y = "Number of Press Releases on TAA (2005-2007)",
       title = "") +
  theme_bw()


ggsave(cshock_pr0507,
       file="~/Dropbox/TAAChinaShock/World Politics/replication/figures_replicated/figureS7.tiff",
       width=5, height=4)


######Figure S8:Density Plots of the China Shock Measure and Instrument
cshock_vote <-
  read.csv("czone_cshock_taa_voting.csv")

cshockvote_pre <- cshock_vote %>% filter(t2 == 0)
cshockvote_post <- cshock_vote %>% filter(t2 == 1)

###Get the mean value
cs_pre_mean <- mean(cshockvote_pre$d_tradeusch_pw)
otch_pre_mean <- mean(cshockvote_pre$d_tradeotch_pw_lag)
cs_post_mean <- mean(cshockvote_post$d_tradeusch_pw)
otch_post_mean <- mean(cshockvote_post$d_tradeotch_pw_lag)

pre_us <-
  ggplot(cshockvote_pre, aes(x = d_tradeusch_pw)) + geom_density() +
  geom_vline(
    xintercept = cs_pre_mean,
    linetype = "dashed",
    color = "blue",
    size = 0.5
  ) +
  labs(x="Import penetration", 
       title = "China Shock, pre-2000") +
  ylab("") + theme_bw()


pre_others <-
  ggplot(cshockvote_pre, aes(x = d_tradeotch_pw_lag)) + geom_density() +
  geom_vline(
    xintercept = otch_pre_mean,
    linetype = "dashed",
    color = "blue",
    size = 0.5
  ) +
  labs(x="Import penetration", 
       title = "China Shock \n Instrument, pre-2000") +
  ylab("") + theme_bw() + theme(plot.title = element_text(size = 12))


post_us <-
  ggplot(cshockvote_post, aes(x = d_tradeusch_pw)) + geom_density() +
  geom_vline(
    xintercept = cs_post_mean,
    linetype = "dashed",
    color = "blue",
    size = 0.5
  ) +
  labs(x="Import penetration", 
       title = "China Shock, post-2000") +
  ylab("") + theme_bw()


post_others <-
  ggplot(cshockvote_post, aes(x = d_tradeotch_pw_lag)) + geom_density() +
  geom_vline(
    xintercept = otch_post_mean,
    linetype = "dashed",
    color = "blue",
    size = 0.5
  ) +
  labs(x="Import penetration", 
       title = "China Shock \n Instrument, post-2000") +
  ylab("") + theme_bw() + theme(plot.title = element_text(size = 12))

chinashock_distribution <- ggarrange(pre_us, post_us,
                                     pre_others, post_others,
                                     ncol = 2, nrow = 2)

ggsave(chinashock_distribution, 
       file="~/Dropbox/TAAChinaShock/World Politics/replication/figures_replicated/figureS8.tiff",
       width=5, height=5)


######Figure S9: Ten-year Change in the Number of Certified Petitions, by CZ
taazipcz_vote <-
  read.csv(
    "~/Dropbox/TAAChinaShock/csv/taazipcz_vote.csv",
    as.is = T,
    na.strings = c("", "NA")
  )

#Calculate just the difference in differences (did_certiifed, did_certified_wrks) as well as the adjusted difference in differences(did_certified_adj, did_certified_wrks_adj)
taa <- taazipcz_vote %>%
  filter(D_year == 1990 | D_year == 2000 | D_year == 2007) %>%
  mutate(certified_wrks = as.numeric(ifelse(certified == 1, Est..No..Workers, 0)))

taa$Est..No..Workers <- as.numeric(taa$Est..No..Workers)

bycz <- taa %>% group_by(czid_1990, D_year) %>%
  summarise(
    total = n(),
    numwrks = sum(as.numeric(Est..No..Workers), na.rm = T),
    certified = sum(certified),
    certified_numwrks = sum(certified_wrks, na.rm = T),
    leniency = certified / total * 100
  )

#Extract the distinct list of CZs from the China Shock dataset: 722 CZs (same as ADH, Kim and Pelc)
czlist <- workfile2 %>% select(czone)
czlist <- distinct(czlist)

########Rearrange the dataset at the level of cz
#The 1990 data
bycz_1990 <- bycz %>%
  filter(D_year == 1990) %>%
  select(czid_1990, total, numwrks, certified, certified_numwrks) %>%
  rename(
    total90 = total,
    numwrks90 = numwrks,
    certified90 = certified,
    certified_numwrks90 = certified_numwrks
  )

#Fill in CZs with zero TAA usage with zeros
bycz_1990_2 <- czlist %>%
  left_join(bycz_1990, by = c("czone" = "czid_1990")) %>%
  mutate_at(c(2:5), ~ replace(., is.na(.), 0))

#The 2000 data
bycz_2000 <- bycz %>% filter(D_year == 2000) %>%
  select(czid_1990, total, numwrks, certified, certified_numwrks) %>%
  rename(
    total00 = total,
    numwrks00 = numwrks,
    certified00 = certified,
    certified_numwrks00 = certified_numwrks
  )

#Fill in CZs with zero TAA usage with zeros
bycz_2000_2 <- czlist %>%
  left_join(bycz_2000, by = c("czone" = "czid_1990")) %>%
  mutate_at(c(2:5), ~ replace(., is.na(.), 0))

#The 2007 data
bycz_2007 <- bycz %>% filter(D_year == 2007) %>%
  select(czid_1990, total, numwrks, certified, certified_numwrks) %>%
  rename(
    total07 = total,
    numwrks07 = numwrks,
    certified07 = certified,
    certified_numwrks07 = certified_numwrks
  )

#Fill in CZs with zero TAA usage with zeros
bycz_2007_2 <- czlist %>%
  left_join(bycz_2007, by = c("czone" = "czid_1990")) %>%
  mutate_at(c(2:5), ~ replace(., is.na(.), 0))

#Calculate the difference in differences (Based on Kim and Pelc 2021)
bycz_d <-
  bycz_1990_2 %>% left_join(bycz_2000_2) %>% left_join(bycz_2007_2) %>%
  mutate(
    d_cert_0090 = certified00 - certified90,
    d_cert_0700 = certified07 - certified00,
    d_cert_wrks_0090 = certified_numwrks00 - certified_numwrks90,
    d_cert_wrks_0700 = certified_numwrks07 - certified_numwrks00,
    did_cert = d_cert_0700 - d_cert_0090,
    did_cert_wrks = d_cert_wrks_0700 - d_cert_wrks_0090,
    did_cert_adj = d_cert_0700 - d_cert_0090 * 10 / 7,
    did_cert_wrks_adj = d_cert_wrks_0700 - d_cert_wrks_0090 * 10 /
      7
  )

###Now let's see the distribution of DID DVs
histogram_d_cert_petition <-
  ggplot(data = bycz_d, aes(x = did_cert)) +
  geom_histogram(binwidth = 1,
                 color = "black",
                 fill = "skyblue") + theme_bw() +
  labs(x = "Changes in the number of certified petitions",
       y = "Count of CZs",
       title = "722 CZs, Bin Width = 1")

ggsave(histogram_d_cert_petition,
       file="~/Dropbox/TAAChinaShock/World Politics/replication/figures_replicated/figureS9.tiff",
       width=5, height=4)
  
  
######Figure S10: Ten-year Change in the Number of Certified Workers, by CZ
histogram_d_cert_workers <-
  ggplot(data = bycz_d, aes(x = did_cert_wrks)) +
  geom_histogram(binwidth = 100,
                 color = "black",
                 fill = "orange") + theme_bw() +
  labs(x = "Changes in the number of certified workers",
       y = "Count of CZs",
       title = "722 CZs, Bin Width= 100")

ggsave(histogram_d_cert_workers,
       file="~/Dropbox/TAAChinaShock/World Politics/replication/figures_replicated/figureS10.tiff",
       width=5, height=4)

######S11: Decade-average Change in the Number of Certified Petitions, by CZ
###Calculate the decade average (1990 - 2007)
taa <- taazipcz_vote %>%
  filter(D_year > 1989 & D_year < 2008) %>%
  mutate(period2 = ifelse(D_year > 1999, 1, 0),
         certified_wrks = as.numeric(ifelse(certified == 1, Est..No..Workers, 0)))

taa_p1 <- taa %>% filter(period2 == 0)
taa_p2 <- taa %>% filter(period2 == 1)

bycz_p1 <- taa_p1 %>% group_by(czid_1990) %>%
  summarise(
    mcertified_p1 = sum(certified, na.rm = T) / 10,
    mwrks_p1 = sum(certified_wrks, na.rm = T) / 10
  )

bycz_p1_adj <- czlist %>%
  left_join(bycz_p1, by = c("czone" = "czid_1990")) %>%
  mutate_at(c(2:3), ~ replace(., is.na(.), 0))

bycz_p2 <- taa_p2 %>% group_by(czid_1990) %>%
  summarise(
    mcertified_p2 = sum(certified, na.rm = T) / 8,
    mwrks_p2 = sum(certified_wrks, na.rm = T) / 8
  )

bycz_p2_adj <- czlist %>%
  left_join(bycz_p2, by = c("czone" = "czid_1990")) %>%
  mutate_at(c(2:3), ~ replace(., is.na(.), 0))

bycz_p1p2 <- bycz_p1_adj %>%
  left_join(bycz_p2_adj) %>%
  mutate(d_mcertified = mcertified_p2 - mcertified_p1,
         d_mwrks = mwrks_p2 - mwrks_p1)

###Now let's see the distribution of the changes in TAA distribution
histogram_d_cert_petition_av <-
  ggplot(data = bycz_p1p2, aes(x = d_mcertified)) +
  geom_histogram(binwidth = 1,
                 color = "black",
                 fill = "skyblue") + theme_bw() +
  labs(x = "Changes in the number of certified petitions",
       y = "Count of CZs",
       title = "722 CZs, Bin Width = 1")

ggsave(histogram_d_cert_petition_av,
       file="~/Dropbox/TAAChinaShock/World Politics/replication/figures_replicated/figureS11.tiff",
       width=5, height=4)

######Figure S12: Decade-average Change in the Number of Certified Workers, by CZ
histogram_d_cert_workers_av <-
  ggplot(data = bycz_p1p2, aes(x = d_mwrks)) +
  geom_histogram(binwidth = 100,
                 color = "black",
                 fill = "orange") + theme_bw() +
  labs(x = "Changes in the number of certified workers",
       y = "Count of CZs",
       title = "722 CZs, Bin Width= 100")

ggsave(histogram_d_cert_workers_av,
     file="~/Dropbox/TAAChinaShock/World Politics/replication/figures_replicated/figureS12.tiff",
     width=5, height=4)

######Figure S15: Distribution of Instrumented TAA Certification Rate, CZ-level
taazipcz_cut <- taazipcz_vote%>%
  select(D_year, untenured, certified, Det.Date, Cert.Officer, czid_1990)%>%
  filter(Det.Date > as.Date("2004-11-02") & Det.Date < as.Date("2008-11-04"))

###46 NAs out of 9636 observations (0.005%)
##eliminate the NAs (left with 9035 observations)
taazipcz_cut<- taazipcz_cut%>%
  filter(!is.na(certified)&!is.na(Cert.Officer)&!is.na(czid_1990))

#####Jackknife IV estimation
###First Iteration
##First observation has the cz(czid_1990) of 29502
head(taazipcz_cut$czid_1990)
#Step1: Run the regression with observations with non-15000 cz
first <-
  glm(certified ~ Cert.Officer, data = taazipcz_cut[taazipcz_cut$czid_1990 !=
                                                      29502, ], family = binomial)
#Step2: Create a dataset only with the first observation
firstdf <- taazipcz_cut[1, ]
#Step3: Calaulte the predicted value of the observation in Step2 dataset, with the model in Step1
firstdf$predictedcertified <-
  predict(first, newdata = firstdf, type = "response")
#Step4: Repeat 7313 times (the total observations of taazipcz_cut)

#For loop
taaid <- c(2:nrow(taazipcz_cut))

# initialize df
df = firstdf

for (i in taaid) {
  cz <- taazipcz_cut[i,]$czid_1990
  
  model <-
    glm(certified ~ Cert.Officer,
        data = taazipcz_cut[taazipcz_cut$czid_1990 != cz, ],
        family = binomial)
  
  this_df <- taazipcz_cut[i, ]
  
  this_df$predictedcertified <-
    predict(model, newdata = this_df, type = "response")
  
  df <- rbind(df, this_df)
}

bycz <- df %>%
  group_by(czid_1990) %>%
  summarise(
    pcert = mean(predictedcertified * 100, na.rm = T),
    actualcert = mean(certified *
                        100, na.rm = T),
    totalpetitions = n()
  ) %>% na.omit()

###Population sd of bycz
sqrt((505 - 1) / 505) * sd(bycz$pcert)

###Draw the distribution of the instrumented TAA certification rate
pcert_distribution <-
  ggplot(data = bycz, aes(x = pcert)) + 
  geom_histogram(binwidth = 0.1, colour = "black", fill = "white") +
  theme_bw() + labs(
    x = "Predicted TAA certification rate, CZ-level",
    y = "Counts",
    title = "CZ-level, N=505, Bin width=0.1"
  )

ggsave(pcert_distribution, 
       file="~/Dropbox/TAAChinaShock/World Politics/replication/figures_replicated/figureS15.tiff",
       width=5, height=4)

######Figure S16: Testing the Consternation Effect with an Imputation
#Testing the consternation with the imputed dataset
master_adj <- read.csv("master_imputed.csv", as.is = T)

master_adj$ipw_us2 <- as.factor(master_adj$ipw_us2)
levels(master_adj$ipw_us2) <- c("Low", "High")
master_adj$ipw_others2 <- as.factor(master_adj$ipw_others2)
levels(master_adj$ipw_others2) <- c("Low", "High")

master_adj <-
  master_adj %>% select(
    d_rep_0800,
    d_tradeusch_pw,
    d_tradeotch_pw_lag,
    ipw_us2,
    ipw_others2,
    l_shind_manuf_cbp,
    l_sh_routine33,
    l_task_outsource,
    l_sh_empl_f,
    l_sh_popedu_c,
    l_sh_popfborn,
    region,
    Midwest,
    Northeast,
    South,
    West,
    pcert_adj,
    t2,
    votetotal_2000,
    actcert_adj
  ) %>% na.omit()

model_int_b_imputed <- ivreg(
  d_rep_0800 ~ ipw_us2 * pcert_adj + t2 +
    l_shind_manuf_cbp + l_sh_routine33 +  l_task_outsource +
    l_sh_empl_f + l_sh_popedu_c + l_sh_popfborn +
    region |
    ipw_others2 * pcert_adj + t2 +
    l_shind_manuf_cbp + l_sh_routine33 +  l_task_outsource +
    l_sh_empl_f + l_sh_popedu_c + l_sh_popfborn +
    region,
  weights = votetotal_2000,
  data = master_adj
)

#Calculate the fitted value
ivdf_adj <- master_adj %>%
  mutate(ivfitted_adj = model_int_b_imputed$fitted)

it2_adj <- ggplot(data = ivdf_adj, aes(x = pcert_adj, y = ivfitted_adj))

itplot2_adj <- it2_adj + geom_jitter(alpha = .1, width = .1) +
  stat_smooth(
    method = "lm",
    level = 0.99,
    fullrange = T,
    size = 0.3
  ) +
  ylim(-10, 5) +
  facet_grid(. ~ ipw_us2) + theme_classic() +
  labs(title = "") +
  xlab("Predicted TAA Certification Rate (Imputed)") +
  ylab("Changes in Republican Party Vote Share")

ggsave(itplot2_adj,
       file="~/Dropbox/TAAChinaShock/World Politics/replication/figures_replicated/figureS16.tiff",
       width=5, height=4)

######Figure S17: The Marginal Effect Plot by Import Exposure
master <- read.csv("master.csv", as.is = T)

model_int <- ivreg(
  d_rep_0800 ~ d_tradeusch_pw * pcert + t2 +
    l_shind_manuf_cbp + l_sh_routine33 +  l_task_outsource +
    l_sh_empl_f + l_sh_popedu_c + l_sh_popfborn +
    region |
    d_tradeotch_pw_lag * pcert + t2 +
    l_shind_manuf_cbp + l_sh_routine33 +  l_task_outsource +
    l_sh_empl_f + l_sh_popedu_c + l_sh_popfborn +
    region,
  weights = votetotal_2000,
  data = master
)

cme_plot <-
  plot_cme(model_int,
           effect = "pcert",
           condition = c("d_tradeusch_pw"))

cme_plot2 <- cme_plot +
  labs(x = "Import Exposure", y = "The Marginal Effect of TAA on the Election",
       title = "")

cme_plot2_rug <- cme_plot2 +
  geom_rug(
    data = master,
    aes(x = d_tradeusch_pw),
    sides = "b",
    alpha = .05) + theme_bw()

ggsave(cme_plot2_rug, 
       file="~/Dropbox/TAAChinaShock/World Politics/replication/figures_replicated/figureS17.tiff",
       width=5, height=4)
